package com.bigdata.core.action

import org.apache.spark.{SparkConf, SparkContext}

/**
 * 遍历的数据是每个partition 的数据。
 * 是一个action算子，会触发job执行
 */
object Demo7_foreachPartition {
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf()
    conf.setMaster("local")
    conf.setAppName("foreachPartition")
    val sc = new SparkContext(conf)
    sc.setLogLevel("error")

    val infos = sc.parallelize(List[String]("a", "b", "c", "d", "e", "f", "g"), 3)

    infos.foreachPartition(iter => {
      println("process partition info start...")
      iter.foreach(elem => {
        println("processing ..." + elem)
      })
      println("process partition info end...")
    })

  }
}
